In this script, I load exchange data from datras and calculate catch of cod and flounder in unit kg/km^2 (with TVL gear), by correcting for gear dimensions, sweeplength and trawl speed, following Orio et al 2017. I also add oxygen, temperature and depth covariates, which I use for modelling this biomass index.
rm(list = ls())
library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(sdmTMB) # remotes::install_github("pbs-assess/sdmTMB")
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
#library(gganimate)
world <- ne_countries(scale = "medium", returnclass = "sf")
# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22
map_data <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf", continent = "europe")
# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
st_crop(map_data,
c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))
# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)
#ggplot(swe_coast_proj) + geom_sf()
# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv")
# Read HL data
# bits_hl <- getDATRAS(record = "HL", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_hl, "data/DATRAS_exchange/bits_hl.csv")
bits_hl <- read.csv("data/DATRAS_exchange/bits_hl.csv")
# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = 4)
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv")
# Read gear standardization data
#sweep <- read.csv("data/from_ale/sweep_9116.csv", sep = ";", dec = ",", fileEncoding = "latin1")
sweep <- read.csv("data/from_ale/sweep_9118_ml.csv", sep = ";", fileEncoding = "latin1")
# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(sweep$Ship))
#> [1] "26HF" "ATL" "ATLD" "BAL" "BALL" "BPE" "CEV" "CLP" "CLV" "COML"
#> [11] "DAN2" "DANS" "DAR" "GDY" "HAF" "KOH" "KOOT" "MON" "MONL" "SOL"
#> [21] "SOL2" "VSH" "ZBA"
sort(unique(bits_hh$Ship))
#> [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"
sort(unique(bits_hl$Ship))
#> [1] "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77SE" "AA36" "ESLF"
#> [11] "ESTM" "LTDA" "RUJB" "RUNT"
# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)
bits_hh <- bits_hh %>%
mutate(Ship2 = fct_recode(Ship,
"SOL" = "06S1",
"SOL2" = "06SL",
"DAN2" = "26D4",
"HAF" = "26HF",
"HAF" = "26HI",
"HAF" = "67BC",
"BAL" = "67BC",
"ARG" = "77AR",
"77SE" = "77SE",
"AA36" = "AA36",
"KOOT" = "ESLF",
"KOH" = "ESTM",
"DAR" = "LTDA",
"ATLD" = "RUJB",
"ATL" = "RUNT"),
Ship2 = as.character(Ship2)) %>%
mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))
bits_hl <- bits_hl %>%
mutate(Ship2 = fct_recode(Ship,
"SOL" = "06S1",
"SOL2" = "06SL",
"DAN2" = "26D4",
"HAF" = "26HF",
"HAF" = "26HI",
"HAF" = "67BC",
"BAL" = "67BC",
"ARG" = "77AR",
"77SE" = "77SE",
"AA36" = "AA36",
"KOOT" = "ESLF",
"KOH" = "ESTM",
"DAR" = "LTDA",
"ATLD" = "RUJB",
"ATL" = "RUNT"),
Ship2 = as.character(Ship2)) %>%
mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))
bits_ca <- bits_ca %>%
mutate(Ship2 = fct_recode(Ship,
"SOL" = "06S1",
"SOL2" = "06SL",
"DAN2" = "26D4",
"HAF" = "26HF",
"HAF" = "26HI",
"HAF" = "67BC",
"BAL" = "67BC",
"ARG" = "77AR",
"77SE" = "77SE",
"AA36" = "AA36",
"KOOT" = "ESLF",
"KOH" = "ESTM",
"DAR" = "LTDA",
"ATLD" = "RUJB",
"ATL" = "RUNT"),
Ship2 = as.character(Ship2)) %>%
mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))
# Ok, which ships are missing in the exchange data?
unique(bits_hh$Ship3)[!unique(bits_hh$Ship3) %in% unique(sweep$Ship)]
#> [1] "ARG" "AA36" "77SE"
# Swedish Ships and unidentified ships are NOT in the Sweep data
unique(sweep$Ship3)[!unique(sweep$Ship3) %in% unique(bits_hh$Ship3)]
#> NULL
# But all Sweep Ships are in the exchange data
# Now check which country codes are used
sort(unique(sweep$Country))
#> [1] "DEN" "EST" "GFR" "LAT" "LTU" "POL" "RUS" "SWE"
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"
# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
mutate(Country = fct_recode(Country,
"DEN" = "DK",
"EST" = "EE",
"GFR" = "DE",
"LAT" = "LV",
"LTU" = "LT",
"POL" = "PL",
"RUS" = "RU",
"SWE" = "SE"),
Country = as.character(Country))
bits_hl <- bits_hl %>%
mutate(Country = fct_recode(Country,
"DEN" = "DK",
"EST" = "EE",
"GFR" = "DE",
"LAT" = "LV",
"LTU" = "LT",
"POL" = "PL",
"RUS" = "RU",
"SWE" = "SE"),
Country = as.character(Country))
bits_ca <- bits_ca %>%
mutate(Country = fct_recode(Country,
"DEN" = "DK",
"EST" = "EE",
"GFR" = "DE",
"LAT" = "LV",
"LTU" = "LT",
"POL" = "PL",
"RUS" = "RU",
"SWE" = "SE"),
Country = as.character(Country))
# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#> [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"
sort(unique(bits_hl$Gear))
#> [1] "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "LBT" "PEL" "SON" "TVL" "TVS"
sort(unique(sweep$Gear))
#> [1] "CAM" "CHP" "DT" "EGY" "ESB" "EXP" "GRT" "H20" "HAK" "LBT" "LPT" "P20"
#> [13] "PEL" "SON" "TVL" "TVS"
# Which gears are NOT in the sweep data?
unique(bits_hl$Gear)[!unique(bits_hl$Gear) %in% unique(sweep$Gear)]
#> [1] "FOT" "GOV"
# Create ID column
bits_ca <- bits_ca %>%
mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
bits_hl <- bits_hl %>%
mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
bits_hh <- bits_hh %>%
mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
# Works like a haul-id
# bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
bits_hl <- bits_hl %>%
mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":"))
bits_hh <- bits_hh %>%
mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":"))
# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
#filter(!Country == "SWE") %>% # I'll deal with Sweden later...
filter(HaulVal %in% c("A","N","V"))
# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)
# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")
pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat),
proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output
bits_hh$SD <- over(pts, shape)$Area_27
# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$SD))
#> [1] "3.a.20" "3.a.21" "3.b.23" "3.c.22" "3.d.24" "3.d.25"
#> [7] "3.d.26" "3.d.27" "3.d.28.1" "3.d.28.2" "3.d.29"
bits_hh <- bits_hh %>%
mutate(SD = factor(SD),
SD = fct_recode(SD,
"20" = "3.a.20",
"21" = "3.a.21",
"22" = "3.c.22",
"23" = "3.b.23",
"24" = "3.d.24",
"25" = "3.d.25",
"26" = "3.d.26",
"27" = "3.d.27",
"28" = "3.d.28.1",
"28" = "3.d.28.2",
"29" = "3.d.29",
"30" = "3.d.30"),
SD = as.character(SD))
#> Warning: Problem with `mutate()` input `SD`.
#> ℹ Unknown levels in `f`: 3.d.30
#> ℹ Input `SD` is `fct_recode(...)`.
#> Warning: Unknown levels in `f`: 3.d.30
# Now add the fishing line information from the sweep file (we need that later
# to standardize based on gear geometry). We add in the in the HH data and then
# transfer it to the other exchange data files when left_joining.
# Check which Fishing lines I have in the sweep data:
fishing_line <- sweep %>% group_by(Gear) %>% distinct(Fishing.line)
bits_hh <- left_join(bits_hh, fishing_line)
# sweep %>% group_by(Gear) %>% distinct(Fishing.line)
# bits_hh %>% group_by(Gear) %>% distinct(Fishing.line)
bits_hh$Fishing.line <- as.numeric(bits_hh$Fishing.line)
# Which gears do now have fishing line?
bits_hh$Fishing.line[is.na(bits_hh$Fishing.line)] <- -9
bits_hh %>% filter(Fishing.line == -9) %>% distinct(Gear)
#> Gear
#> 1 FOT
#> 2 GOV
#> 3 ESB
#> 4 GRT
# FROM the index files (Orio)
# FOT has 83
# GOV has 160
# ESB ??
# GRT ??
# Add these values:
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "FOT", 83, Fishing.line))
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "GOV", 160, Fishing.line))
# Now select the hauls in the HH data when subsetting the HL data
bits_hl <- bits_hl %>%
filter(haul.id %in% bits_hh$haul.id)
# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$SD))
#> [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#> [1] "BotCurDir" "BotCurSpeed" "BotSal"
#> [4] "BotTemp" "Buoyancy" "BySpecRecCode"
#> [7] "CodendMesh" "Country" "DataType"
#> [10] "DateofCalculation" "Day" "DayNight"
#> [13] "Depth" "DepthStratum" "Distance"
#> [16] "DoorSpread" "DoorSurface" "DoorType"
#> [19] "DoorWgt" "Fishing.line" "Gear"
#> [22] "GearEx" "GroundSpeed" "haul.id"
#> [25] "HaulDur" "HaulLat" "HaulLong"
#> [28] "HaulNo" "HaulVal" "HydroStNo"
#> [31] "IDx" "KiteDim" "MaxTrawlDepth"
#> [34] "MinTrawlDepth" "Month" "Netopening"
#> [37] "PelSampType" "Quarter" "RecordType"
#> [40] "Rect" "Rigging" "SD"
#> [43] "SecchiDepth" "Ship" "Ship2"
#> [46] "Ship3" "ShootLat" "ShootLong"
#> [49] "SpeedWater" "StatRec" "StdSpecRecCode"
#> [52] "StNo" "SurCurDir" "SurCurSpeed"
#> [55] "SurSal" "SurTemp" "Survey"
#> [58] "SweepLngt" "SwellDir" "SwellHeight"
#> [61] "ThClineDepth" "ThermoCline" "Tickler"
#> [64] "TidePhase" "TideSpeed" "TimeShot"
#> [67] "TowDir" "Turbidity" "WarpDen"
#> [70] "Warpdia" "Warplngt" "WgtGroundRope"
#> [73] "WindDir" "WindSpeed" "WingSpread"
#> [76] "X" "Year"
bits_hh_merge <- bits_hh %>%
dplyr::select(SD, Rect, HaulVal, StdSpecRecCode, BySpecRecCode, Fishing.line,
DataType, HaulDur, GroundSpeed, haul.id, IDx, ShootLat, ShootLong)
bits_hl <- left_join(dplyr::select(bits_hl, -haul.id), bits_hh_merge, by = "IDx")
bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")
# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_hl <- bits_hl %>% filter(SD %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(SD %in% c(24, 25, 26, 27, 28))
hlcod <- bits_hl %>%
filter(SpecCode %in% c("126436", "164712")) %>%
mutate(Species = "Gadus morhua")
hlfle <- bits_hl %>%
filter(SpecCode %in% c("127141", "172894")) %>%
mutate(Species = "Platichthys flesus")
# Find common columns in the HH and HL data (here already subset by species)
comcol <- intersect(names(hlcod), names(bits_hh))
# What is the proportion of zero-catch hauls?
# Here we don't have zero catches
hlcod %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(HLNoAtLngt)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
distinct(zero_catch)
#> # A tibble: 1 x 1
#> zero_catch
#> <chr>
#> 1 N
# Cod: Add 0s and then remove lines with SpecVal = 0 (first NA because we don't have a match in the HH, then make them 0 later)
hlcod0 <- full_join(hlcod, bits_hh[, comcol], by = comcol)
# No zeroes yet
hlcod0 %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(HLNoAtLngt)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
distinct(zero_catch)
#> # A tibble: 1 x 1
#> zero_catch
#> <lgl>
#> 1 NA
hlcod0$SpecVal[is.na(hlcod0$SpecVal)] <- "zeroCatch"
hlcod0$SpecVal <- factor(hlcod0$SpecVal)
hlcod0 <- hlcod0 %>% filter(!SpecVal == "0")
# Add species again after merge
hlcod0$Species<-"Gadus morhua"
# Flounder: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hlfle0 <- full_join(hlfle, bits_hh[, comcol], by = comcol)
hlfle0 <- hlfle0[!(is.na(hlfle0$Species) & hlfle0$StdSpecRecCode != 1),]
hlfle0$SpecVal[is.na(hlfle0$SpecVal)] <- "zeroCatch"
hlfle0$SpecVal <- factor(hlfle0$SpecVal)
hlfle0 <- hlfle0 %>% filter(!SpecVal == "0")
hlfle0$Species<-"Platichthys flesus"
# Check number of hauls per species
hlcod0 %>% distinct(haul.id) %>% nrow()
#> [1] 4392
hlfle0 %>% distinct(haul.id) %>% nrow()
#> [1] 4373
SpecVal=1. If DataType=C then CPUEun=HLNoAtLngt, if DataType=R then CPUEun=HLNoAtLngt/(HaulDur/60), if DataType=S then CPUEun=(HLNoAtLngt*SubFactor)/(HaulDur/60). If SpecVal="zeroCatch" then CPUEun=0, if SpecVal=4 we need to decide (no length measurements, only total catch). Note that here we also add zero CPUE if SpecVal=="zeroCatch".Then I will sum for the same haul the CPUE of the same length classes if they were sampled with different subfactors or with different sexes.
# Cod
hlcod0 <- hlcod0 %>%
mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
HLNoAtLngt,
ifelse(SpecVal == "1" & DataType == "R",
HLNoAtLngt/(HaulDur/60),
ifelse(SpecVal == "1" & DataType == "S",
(HLNoAtLngt*SubFactor)/(HaulDur/60),
ifelse(SpecVal == "zeroCatch", 0, NA)))))
# Plot and fill by zero catch
hlcod0 %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(CPUEun)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
group_by(Year, zero_catch) %>%
summarise(n = n()) %>%
ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
geom_bar(stat = "identity")
# Some rows have multiple rows per combination of length class and haul id,
# so we need to sum it up
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 2 x 1
#> n
#> <int>
#> 1 1
#> 2 2
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#> X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1 97185 HL BITS 4 EST ESLF TVS NA <NA> NA
#> 2 97186 HL BITS 4 EST ESLF TVS NA <NA> NA
#> 3 99360 HL BITS 4 DEN 26D4 TVL NA R NA
#> 4 99366 HL BITS 4 DEN 26D4 TVL NA R NA
#> 5 100061 HL BITS 4 DEN 26D4 TVL NA R NA
#> 6 100063 HL BITS 4 DEN 26D4 TVL NA R NA
#> 7 100064 HL BITS 4 DEN 26D4 TVL NA R NA
#> 8 100065 HL BITS 4 DEN 26D4 TVL NA R NA
#> 9 100066 HL BITS 4 DEN 26D4 TVL NA R NA
#> 10 100067 HL BITS 4 DEN 26D4 TVL NA R NA
#> 11 100068 HL BITS 4 DEN 26D4 TVL NA R NA
#> 12 100070 HL BITS 4 DEN 26D4 TVL NA R NA
#> 13 100072 HL BITS 4 DEN 26D4 TVL NA R NA
#> 14 100076 HL BITS 4 DEN 26D4 TVL NA R NA
#> 15 100078 HL BITS 4 DEN 26D4 TVL NA R NA
#> 16 100079 HL BITS 4 DEN 26D4 TVL NA R NA
#> 17 100081 HL BITS 4 DEN 26D4 TVL NA R NA
#> 18 100082 HL BITS 4 DEN 26D4 TVL NA R NA
#> 19 100084 HL BITS 4 DEN 26D4 TVL NA R NA
#> 20 100085 HL BITS 4 DEN 26D4 TVL NA R NA
#> StNo HaulNo Year SpecCodeType SpecCode SpecVal Sex TotalNo CatIdentifier
#> 1 14 14 2000 T 164712 1 M 2 1
#> 2 14 14 2000 T 164712 1 F 4 1
#> 3 63 31 2001 T 164712 1 M 5 1
#> 4 63 31 2001 T 164712 1 F 7 1
#> 5 83 37 2001 T 164712 1 M 21 1
#> 6 83 37 2001 T 164712 1 M 21 1
#> 7 83 37 2001 T 164712 1 M 21 1
#> 8 83 37 2001 T 164712 1 M 21 1
#> 9 83 37 2001 T 164712 1 M 21 1
#> 10 83 37 2001 T 164712 1 M 21 1
#> 11 83 37 2001 T 164712 1 M 21 1
#> 12 83 37 2001 T 164712 1 M 21 1
#> 13 83 37 2001 T 164712 1 M 21 1
#> 14 83 37 2001 T 164712 1 F 42 1
#> 15 83 37 2001 T 164712 1 F 42 1
#> 16 83 37 2001 T 164712 1 F 42 1
#> 17 83 37 2001 T 164712 1 F 42 1
#> 18 83 37 2001 T 164712 1 F 42 1
#> 19 83 37 2001 T 164712 1 F 42 1
#> 20 83 37 2001 T 164712 1 F 42 1
#> NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1 3 1 NA 32 1 35 2 <NA>
#> 2 3 1 NA 32 1 35 2 <NA>
#> 3 5 1 NA 3142 1 22 2 <NA>
#> 4 7 1 NA 3142 1 22 1 <NA>
#> 5 21 1 NA 64261 1 22 1 <NA>
#> 6 21 1 NA 64261 1 38 2 <NA>
#> 7 21 1 NA 64261 1 39 2 <NA>
#> 8 21 1 NA 64261 1 41 1 <NA>
#> 9 21 1 NA 64261 1 42 1 <NA>
#> 10 21 1 NA 64261 1 44 2 <NA>
#> 11 21 1 NA 64261 1 45 4 <NA>
#> 12 21 1 NA 64261 1 48 2 <NA>
#> 13 21 1 NA 64261 1 52 1 <NA>
#> 14 42 1 NA 64261 1 22 1 <NA>
#> 15 42 1 NA 64261 1 38 2 <NA>
#> 16 42 1 NA 64261 1 39 2 <NA>
#> 17 42 1 NA 64261 1 41 1 <NA>
#> 18 42 1 NA 64261 1 42 4 <NA>
#> 19 42 1 NA 64261 1 44 3 <NA>
#> 20 42 1 NA 64261 1 45 3 <NA>
#> LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1 NA 20131112 126436 KOOT KOOT
#> 2 NA 20131112 126436 KOOT KOOT
#> 3 NA 20131113 126436 DAN2 DAN2
#> 4 NA 20131113 126436 DAN2 DAN2
#> 5 NA 20131113 126436 DAN2 DAN2
#> 6 NA 20131113 126436 DAN2 DAN2
#> 7 NA 20131113 126436 DAN2 DAN2
#> 8 NA 20131113 126436 DAN2 DAN2
#> 9 NA 20131113 126436 DAN2 DAN2
#> 10 NA 20131113 126436 DAN2 DAN2
#> 11 NA 20131113 126436 DAN2 DAN2
#> 12 NA 20131113 126436 DAN2 DAN2
#> 13 NA 20131113 126436 DAN2 DAN2
#> 14 NA 20131113 126436 DAN2 DAN2
#> 15 NA 20131113 126436 DAN2 DAN2
#> 16 NA 20131113 126436 DAN2 DAN2
#> 17 NA 20131113 126436 DAN2 DAN2
#> 18 NA 20131113 126436 DAN2 DAN2
#> 19 NA 20131113 126436 DAN2 DAN2
#> 20 NA 20131113 126436 DAN2 DAN2
#> IDx SD Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 2000.4.EST.ESLF.TVS.14.14 28 45H1 V 1 1
#> 2 2000.4.EST.ESLF.TVS.14.14 28 45H1 V 1 1
#> 3 2001.4.DEN.26D4.TVL.63.31 26 39G8 V 1 1
#> 4 2001.4.DEN.26D4.TVL.63.31 26 39G8 V 1 1
#> 5 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 6 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 7 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 8 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 9 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 10 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 11 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 12 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 13 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 14 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 15 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 16 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 17 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 18 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 19 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> 20 2001.4.DEN.26D4.TVL.83.37 26 40G8 V 1 1
#> Fishing.line DataType HaulDur GroundSpeed haul.id ShootLat
#> 1 33.22 C 30 3.0 2000:4:EST:KOOT:TVS:14:14 58.0167
#> 2 33.22 C 30 3.0 2000:4:EST:KOOT:TVS:14:14 58.0167
#> 3 63.46 R 30 3.0 2001:4:DEN:DAN2:TVL:63:31 55.4699
#> 4 63.46 R 30 3.0 2001:4:DEN:DAN2:TVL:63:31 55.4699
#> 5 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 6 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 7 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 8 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 9 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 10 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 11 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 12 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 13 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 14 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 15 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 16 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 17 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 18 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 19 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> 20 63.46 R 31 3.1 2001:4:DEN:DAN2:TVL:83:37 55.6627
#> ShootLong Species CPUEun n
#> 1 21.0833 Gadus morhua 2.000000 2
#> 2 21.0833 Gadus morhua 2.000000 2
#> 3 18.3116 Gadus morhua 4.000000 2
#> 4 18.3116 Gadus morhua 2.000000 2
#> 5 18.0414 Gadus morhua 1.935484 2
#> 6 18.0414 Gadus morhua 3.870968 2
#> 7 18.0414 Gadus morhua 3.870968 2
#> 8 18.0414 Gadus morhua 1.935484 2
#> 9 18.0414 Gadus morhua 1.935484 2
#> 10 18.0414 Gadus morhua 3.870968 2
#> 11 18.0414 Gadus morhua 7.741935 2
#> 12 18.0414 Gadus morhua 3.870968 2
#> 13 18.0414 Gadus morhua 1.935484 2
#> 14 18.0414 Gadus morhua 1.935484 2
#> 15 18.0414 Gadus morhua 3.870968 2
#> 16 18.0414 Gadus morhua 3.870968 2
#> 17 18.0414 Gadus morhua 1.935484 2
#> 18 18.0414 Gadus morhua 7.741935 2
#> 19 18.0414 Gadus morhua 5.806452 2
#> 20 18.0414 Gadus morhua 5.806452 2
test <- hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]
hlcodL <- hlcod0 %>%
group_by(LngtClass, haul.id) %>%
mutate(CPUEun = sum(CPUEun)) %>%
ungroup() %>%
mutate(id3 = paste(haul.id, LngtClass)) %>%
distinct(id3, .keep_all = TRUE) %>%
dplyr::select(-X, -id3) # Clean up a bit
# Check with an ID
# filter(hlcod0, haul.id == test_id)
# filter(hlcodL, haul.id == test_id) %>% as.data.frame()
# Do we still have 0 catches?
hlcodL %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(CPUEun)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
group_by(Year, zero_catch) %>%
summarise(n = n()) %>%
ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
geom_bar(stat = "identity")
# Flounder
hlfle0 <- hlfle0 %>%
mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
HLNoAtLngt,
ifelse(SpecVal == "1" & DataType == "R",
HLNoAtLngt/(HaulDur/60),
ifelse(SpecVal == "1" & DataType == "S",
(HLNoAtLngt*SubFactor)/(HaulDur/60),
ifelse(SpecVal == "zeroCatch", 0, NA)))))
# Sum up the CPUES if multiple per length class and haul
hlfleL <- hlfle0 %>%
group_by(LngtClass, haul.id) %>%
mutate(CPUEun = sum(CPUEun)) %>%
ungroup() %>%
mutate(id3 = paste(haul.id, LngtClass)) %>%
distinct(id3, .keep_all = TRUE) %>%
dplyr::select(-X, -id3)
# Cod
bits_ca_cod <- bits_ca %>%
filter(SpecCode %in% c("164712", "126436") & Year < 2020) %>%
mutate(StNo = as.numeric(StNo)) %>%
mutate(Species = "Cod") %>%
mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning: Problem with `mutate()` input `StNo`.
#> ℹ NAs introduced by coercion
#> ℹ Input `StNo` is `as.numeric(StNo)`.
#> Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca_cod)
# test_id <- head(filter(bits_ca_cod, CANoAtLngt == 5))$ID[1]
# filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)
bits_ca_cod <- bits_ca_cod %>% map_df(., rep, .$CANoAtLngt)
# head(data.frame(filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca_cod)
# Looks ok!
# Standardize length and drop NA weights (need that for condition)
bits_ca_cod <- bits_ca_cod %>%
drop_na(IndWgt) %>%
drop_na(LngtClass) %>%
filter(IndWgt > 0 & LngtClass > 0) %>% # Filter positive length and weight
mutate(weight_kg = IndWgt/1000) %>%
mutate(length_cm = ifelse(LngtCode == ".",
LngtClass/10,
LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))
# Plot
ggplot(bits_ca_cod, aes(IndWgt, length_cm)) +
geom_point() +
facet_wrap(~Year)
# Now extract the coefficients for each year (not bothering with outliers at the moment)
cod_intercept <- bits_ca_cod %>%
split(.$Year) %>%
purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'Year') %>%
filter(term == "(Intercept)") %>%
mutate(a = exp(estimate)) %>%
mutate(Year = as.integer(Year)) %>%
dplyr::select(Year, a)
cod_slope <- bits_ca_cod %>%
split(.$Year) %>%
purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'Year') %>%
filter(term == "log(length_cm)") %>%
mutate(Year = as.integer(Year)) %>%
rename("b" = "estimate") %>%
dplyr::select(Year, b)
# Flounder
bits_ca_fle <- bits_ca %>%
filter(SpecCode %in% c("127141", "172894") & Year < 2020) %>%
mutate(StNo = as.numeric(StNo)) %>%
mutate(Species = "Flounder") %>%
mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning: Problem with `mutate()` input `StNo`.
#> ℹ NAs introduced by coercion
#> ℹ Input `StNo` is `as.numeric(StNo)`.
#> Warning: NAs introduced by coercion
bits_ca_fle <- bits_ca_fle %>% map_df(., rep, .$CANoAtLngt)
# Standardize length and drop NA weights (need that for condition)
bits_ca_fle <- bits_ca_fle %>%
drop_na(IndWgt) %>%
drop_na(LngtClass) %>%
filter(IndWgt > 0 & LngtClass > 0) %>% # Filter positive length and weight
mutate(weight_kg = IndWgt/1000) %>%
mutate(length_cm = ifelse(LngtCode == ".",
LngtClass/10,
LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
filter(keep == "Y") %>%
filter(length_cm < 70)
# Plot
ggplot(bits_ca_fle, aes(IndWgt, length_cm, color = LngtCode)) +
geom_point() +
facet_wrap(~Year)
# Now extract the coefficients for each year (not bothering with outliers at the moment)
fle_intercept <- bits_ca_fle %>%
split(.$Year) %>%
purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'Year') %>%
filter(term == "(Intercept)") %>%
mutate(a = exp(estimate)) %>%
mutate(Year = as.integer(Year)) %>%
dplyr::select(Year, a)
fle_slope <- bits_ca_fle %>%
split(.$Year) %>%
purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'Year') %>%
filter(term == "log(length_cm)") %>%
mutate(Year = as.integer(Year)) %>%
rename("b" = "estimate") %>%
dplyr::select(Year, b)
# These are the haul-data
# hlcodL
# hlfleL
hlcodL <- left_join(hlcodL, cod_intercept, by = "Year")
hlcodL <- left_join(hlcodL, cod_slope, by = "Year")
hlfleL <- left_join(hlfleL, fle_intercept, by = "Year")
hlfleL <- left_join(hlfleL, fle_slope, by = "Year")
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlcodL <- hlcodL %>%
mutate(length_cm = ifelse(LngtCode == ".",
LngtClass/10,
LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))
filter(hlcodL, length_cm == 0) # No such thing
#> # A tibble: 0 x 49
#> # … with 49 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> # Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> # DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> # SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> # CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> # CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> # DevStage <chr>, LenMeasType <int>, DateofCalculation <int>,
#> # Valid_Aphia <int>, Ship2 <chr>, Ship3 <chr>, IDx <chr>, SD <chr>,
#> # Rect <chr>, HaulVal <chr>, StdSpecRecCode <int>, BySpecRecCode <int>,
#> # Fishing.line <dbl>, DataType <chr>, HaulDur <int>, GroundSpeed <dbl>,
#> # haul.id <chr>, ShootLat <dbl>, ShootLong <dbl>, Species <chr>,
#> # CPUEun <dbl>, a <dbl>, b <dbl>, length_cm <dbl>
# Now check if all rows where length is NA are the ones with zero catch!
hlcodL %>%
mutate(length2 = replace_na(length_cm, -9),
no_length = ifelse(length2 < 0, "T", "F")) %>%
ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlcodL %>% drop_na(CPUEun)
t <- hlcodL %>% filter(CPUEun == 0)
t <- hlcodL %>% drop_na(length_cm)
# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlcodL <- hlcodL %>% mutate(length_cm2 = replace_na(length_cm, 0))
# Standardize length in the haul-data and calculate weight
hlcodL <- hlcodL %>%
mutate(weight_kg = (a*length_cm2^b)/1000) %>%
mutate(CPUEun_kg = weight_kg*CPUEun)
# Plot and check it's correct also in this data
ggplot(hlcodL, aes(weight_kg, length_cm2)) +
geom_point() +
facet_wrap(~Year)
# Now do the same for flounder
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlfleL <- hlfleL %>%
mutate(length_cm = ifelse(LngtCode %in% c(".", "0"),
LngtClass/10,
LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)
filter(hlfleL, length_cm == 0) # No such thing
#> # A tibble: 0 x 49
#> # … with 49 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> # Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> # DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> # SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> # CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> # CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> # DevStage <chr>, LenMeasType <int>, DateofCalculation <int>,
#> # Valid_Aphia <int>, Ship2 <chr>, Ship3 <chr>, IDx <chr>, SD <chr>,
#> # Rect <chr>, HaulVal <chr>, StdSpecRecCode <int>, BySpecRecCode <int>,
#> # Fishing.line <dbl>, DataType <chr>, HaulDur <int>, GroundSpeed <dbl>,
#> # haul.id <chr>, ShootLat <dbl>, ShootLong <dbl>, Species <chr>,
#> # CPUEun <dbl>, a <dbl>, b <dbl>, length_cm <dbl>
bits_ca_fle <- bits_ca_fle %>%
drop_na(IndWgt) %>%
drop_na(LngtClass) %>%
filter(IndWgt > 0 & LngtClass > 0) %>% # Filter positive length and weight
mutate(weight_kg = IndWgt/1000) %>%
mutate(length_cm = ifelse(LngtCode == ".",
LngtClass/10,
LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
filter(keep == "Y") %>%
filter(length_cm < 70)
# Now check if all rows where length is NA are the ones with zero catch!
hlfleL %>%
mutate(length2 = replace_na(length_cm, -9),
no_length = ifelse(length2 < 0, "T", "F")) %>%
ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 11 rows containing missing values (geom_point).
hlfleL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 5,356 x 2
#> # Groups: length2 [210]
#> CPUEun length2
#> <dbl> <dbl>
#> 1 0 -9
#> 2 1 22
#> 3 1 24
#> 4 1 27
#> 5 1 29
#> 6 1 34
#> 7 1 37
#> 8 1 16
#> 9 1 17
#> 10 1 38
#> # … with 5,346 more rows
# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlfleL %>% drop_na(CPUEun)
# Well, 11 rows. I will remove them
hlfleL <- hlfleL %>% drop_na(CPUEun)
t <- hlfleL %>% filter(CPUEun == 0)
t <- hlfleL %>% drop_na(length_cm)
# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlfleL <- hlfleL %>% mutate(length_cm2 = replace_na(length_cm, 0))
# Standardize length in the haul-data and calculate weight
hlfleL <- hlfleL %>%
mutate(weight_kg = (a*length_cm2^b)/1000) %>%
mutate(CPUEun_kg = weight_kg*CPUEun)
# Plot and check it's correct also in this data
ggplot(hlfleL, aes(weight_kg, length_cm2)) +
geom_point() +
facet_wrap(~Year)
#> Warning: Removed 344 rows containing missing values (geom_point).
# Check
t <- hlfleL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hlfleL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hlfleL %>% drop_na(length_cm2) # Should be no NA
# What is the proportion of zero-catch hauls?
cod_0plot <- hlcodL %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(CPUEun)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
group_by(Year, zero_catch) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = zero_catch, values_from = n) %>%
mutate(prop_zero_catch_hauls = Y/(N+Y)) %>%
ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") +
coord_cartesian(expand = 0, ylim = c(0, 1)) +
ggtitle("Cod")
# How many zero-catch hauls?
fle_0plot <- hlfleL %>%
group_by(haul.id, Year) %>%
summarise(CPUEun_haul = sum(CPUEun)) %>%
ungroup() %>%
mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
group_by(Year, zero_catch) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = zero_catch, values_from = n) %>%
mutate(prop_zero_catch_hauls = Y/(N+Y)) %>%
ggplot(., aes(Year, prop_zero_catch_hauls)) + geom_bar(stat = "identity") +
coord_cartesian(expand = 0, ylim = c(0, 1)) +
ggtitle("Flounder")
cod_0plot / fle_0plot
#> Warning: Removed 1 rows containing missing values (position_stack).
#> Warning: Removed 1 rows containing missing values (position_stack).
To get unit: kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 using a TVL trawl with 75 m sweeps at the standard speed of three knots
# Remove hauls done with the TVL gear with a SweepLngt < 50 (these are calibration hauls, pers. com. Anders & Ale)
# And also hauls without length-information
# Remove pelagic gear
hlcodL <- hlcodL %>%
mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>%
mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>%
filter(keep == "Y") %>%
dplyr::select(-keep, -SweepLngt2) %>%
filter(!Gear == "PEL")
hlfleL <- hlfleL %>%
mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>%
mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>%
filter(keep == "Y") %>%
dplyr::select(-keep, -SweepLngt2) %>%
filter(!Gear == "PEL")
# Add in RS and RSA-values from the sweep file
# CPUE should be multiplied with RS and RSA to standardize to a relative speed and gear dimension.
# There is not a single file will all RS and RSA values. Instead they come in three files:
# - sweep (non-Swedish hauls between 1991-2016)
# - + calculated based on trawl speed and gear dimensions.
# I will join in the RS and RSA values from all sources, then standardize and filter
# away non-standardized hauls
# sort(unique(sweep$Year))
# sort(unique(sweep$Country))
# Since I don't have the sweep data for Swedish data, I have to calculate it from scratch using the
# equation in Orio's spreadsheet
# First I will join in the sweep data,
sweep_sel <- sweep %>% rename("haul.id" = "ï..haul.id") %>% dplyr::select(haul.id, RSA, RS)
hlcodL2 <- left_join(hlcodL, sweep_sel)
hlfleL2 <- left_join(hlfleL, sweep_sel)
hlcodL2 <- hlcodL2 %>%
rename("RS_sweep" = "RS",
"RSA_sweep" = "RSA") %>%
mutate(RS_sweep = as.numeric(RS_sweep),
RSA_sweep = as.numeric(RSA_sweep))
hlfleL2 <- hlfleL2 %>%
rename("RS_sweep" = "RS",
"RSA_sweep" = "RSA") %>%
mutate(RS_sweep = as.numeric(RS_sweep),
RSA_sweep = as.numeric(RSA_sweep))
sort(colnames(hlcodL2))
#> [1] "a" "b" "BySpecRecCode"
#> [4] "CatCatchWgt" "CatIdentifier" "Country"
#> [7] "CPUEun" "CPUEun_kg" "DataType"
#> [10] "DateofCalculation" "DevStage" "DoorType"
#> [13] "Fishing.line" "Gear" "GearEx"
#> [16] "GroundSpeed" "haul.id" "HaulDur"
#> [19] "HaulNo" "HaulVal" "HLNoAtLngt"
#> [22] "IDx" "length_cm" "length_cm2"
#> [25] "LenMeasType" "LngtClass" "LngtCode"
#> [28] "NoMeas" "Quarter" "RecordType"
#> [31] "Rect" "RS_sweep" "RSA_sweep"
#> [34] "SD" "Sex" "Ship"
#> [37] "Ship2" "Ship3" "ShootLat"
#> [40] "ShootLong" "SpecCode" "SpecCodeType"
#> [43] "Species" "SpecVal" "StdSpecRecCode"
#> [46] "StNo" "SubFactor" "SubWgt"
#> [49] "Survey" "SweepLngt" "TotalNo"
#> [52] "Valid_Aphia" "weight_kg" "Year"
sort(colnames(hlfleL2))
#> [1] "a" "b" "BySpecRecCode"
#> [4] "CatCatchWgt" "CatIdentifier" "Country"
#> [7] "CPUEun" "CPUEun_kg" "DataType"
#> [10] "DateofCalculation" "DevStage" "DoorType"
#> [13] "Fishing.line" "Gear" "GearEx"
#> [16] "GroundSpeed" "haul.id" "HaulDur"
#> [19] "HaulNo" "HaulVal" "HLNoAtLngt"
#> [22] "IDx" "length_cm" "length_cm2"
#> [25] "LenMeasType" "LngtClass" "LngtCode"
#> [28] "NoMeas" "Quarter" "RecordType"
#> [31] "Rect" "RS_sweep" "RSA_sweep"
#> [34] "SD" "Sex" "Ship"
#> [37] "Ship2" "Ship3" "ShootLat"
#> [40] "ShootLong" "SpecCode" "SpecCodeType"
#> [43] "Species" "SpecVal" "StdSpecRecCode"
#> [46] "StNo" "SubFactor" "SubWgt"
#> [49] "Survey" "SweepLngt" "TotalNo"
#> [52] "Valid_Aphia" "weight_kg" "Year"
# I will calculate a RS and RSA column in the catch data based on Ale's equation in the sweep file:
sort(unique(hlcodL2$GroundSpeed))
#> [1] -9.0 0.8 1.7 1.8 2.0 2.1 2.2 2.4 2.5 2.6 2.7 2.8 2.9 3.0 3.1
#> [16] 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5 4.6
#> [31] 4.7 4.9 5.2 5.3 5.4 5.6 5.7 5.9 6.0 6.2 6.3 6.7 6.9 7.1 7.3
sort(unique(hlcodL2$Fishing.line))
#> [1] -9.00 28.00 33.22 36.00 63.46 83.00 160.00
sort(unique(hlcodL2$SweepLngt))
#> [1] 40 50 75 87 95 100 180 182 185 188 203 225
# First replace -9 in the columns I use for the calculations with NA so I don't end up with real numbers that are wrong!
hlcodL2 <- hlcodL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))
hlfleL2 <- hlfleL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))
# Now calculate correction factors
hlcodL2 <- hlcodL2 %>% mutate(RS_x = 3/GroundSpeed,
Horizontal.opening..m. = Fishing.line*0.67,
Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
Swept.area = (Size.final..m*3*1860)/1000000,
RSA_x = 0.45388309675081/Swept.area)
hlfleL2 <- hlfleL2 %>% mutate(RS_x = 3/GroundSpeed,
Horizontal.opening..m. = Fishing.line*0.67,
Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
Swept.area = (Size.final..m*3*1860)/1000000,
RSA_x = 0.45388309675081/Swept.area)
# Check EQ. is correct by recalculating it in the sweep file
sweep <- sweep %>% mutate(Horizontal.opening..m.2 = Fishing.line*0.67,
Swep.one.side..after.formula...meter2 = 0.258819045*SweepLngt, # SIN(RADIANS(15))
Size.final..m2 = Horizontal.opening..m.2 + (Swep.one.side..after.formula...meter2*2),
Swept.area2 = (Size.final..m2*3*1860)/1000000,
RSA_x = 0.45388309675081/Swept.area2)
sweep %>%
drop_na() %>%
ggplot(., aes(as.numeric(RSA), RSA_x)) + geom_point() + geom_abline(intercept = 0, slope = 1)
# Replace NAs with -1/3 (because ICES codes missing values as -9 and in the calculation above they get -1/3),
# so that I can filter them easily later
hlcodL2$RS_x[is.na(hlcodL2$RS_x)] <- -1/3
hlcodL2$RS_sweep[is.na(hlcodL2$RS_sweep)] <- -1/3
hlcodL2$RSA_x[is.na(hlcodL2$RSA_x)] <- -1/3
hlcodL2$RSA_sweep[is.na(hlcodL2$RSA_sweep)] <- -1/3
hlfleL2$RS_x[is.na(hlfleL2$RS_x)] <- -1/3
hlfleL2$RS_sweep[is.na(hlfleL2$RS_sweep)] <- -1/3
hlfleL2$RSA_x[is.na(hlfleL2$RSA_x)] <- -1/3
hlfleL2$RSA_sweep[is.na(hlfleL2$RSA_sweep)] <- -1/3
# Compare the difference correction factors (calculated vs imported from sweep file)
p1 <- ggplot(filter(hlcodL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p2 <- ggplot(hlcodL2, aes(RSA_x)) + geom_histogram()
p3 <- ggplot(hlcodL2, aes(RS_sweep)) + geom_histogram() + xlim(0.4, 1.76)
p4 <- ggplot(hlcodL2, aes(RSA_sweep)) + geom_histogram()
(p1 + p2) / (p3 + p4)
#> Warning: Removed 87 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Warning: Removed 29697 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).
p5 <- ggplot(filter(hlfleL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p6 <- ggplot(hlfleL2, aes(RSA_x)) + geom_histogram()
p7 <- ggplot(hlfleL2, aes(RS_sweep)) + geom_histogram() + xlim(0.4, 1.76)
p8 <- ggplot(hlfleL2, aes(RSA_sweep)) + geom_histogram()
(p5 + p6) / (p7 + p8)
#> Warning: Removed 34 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).
#> Warning: Removed 15354 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).
# Why do I have RSA values smaller than one? (eihter because sweep length is longer or gear is larger (GOV))
# Check if I can calculate the same RSA in sweep as that entered there.
# Ok, so the equation is correct. Which ID's have RSA < 1?
hlcodL2 %>%
filter(RSA_x < 1 & RSA_x > 0) %>%
dplyr::select(Year, Country, Ship, Gear, haul.id, Horizontal.opening..m., Fishing.line,
Swep.one.side..after.formula...meter, SweepLngt, Size.final..m, Swept.area, RSA_x) %>%
ggplot(., aes(RSA_x, fill = factor(SweepLngt))) + geom_histogram() + facet_wrap(~Gear, ncol = 1)
# Check if I have more than one unique RS or RSA value per haul, or if it's "either this or that"
# Filter positive in both columns
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
hlcodL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
hlfleL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
# Ok, there's on odd RS_x that is larger than 3. It didn't catch anything and speed is 0.8! Will remove
hlcodL2 <- hlcodL2 %>% filter(RS_x < 3)
hlfleL2 <- hlfleL2 %>% filter(RS_x < 3)
# Plot again
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")
# They are the same when they overlap, so it does not matter which data source I use for RS and RSA values
# Make a single RS and RSA column
# Cod
hlcodL3 <- hlcodL2 %>%
mutate(RS = -99,
RS = ifelse(RS_sweep > 0, RS_sweep, RS),
RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
mutate(RSA = -99,
RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
filter(RS > 0) %>%
filter(RSA > 0) %>%
mutate(RSRSA = RS*RSA)
# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()
hlfleL2 %>% filter(Country == "LAT") %>% distinct(Year) %>% arrange(Year)
#> # A tibble: 22 x 1
#> Year
#> <int>
#> 1 1997
#> 2 1999
#> 3 2000
#> 4 2001
#> 5 2002
#> 6 2003
#> 7 2004
#> 8 2005
#> 9 2006
#> 10 2007
#> # … with 12 more rows
# Flounder
hlfleL3 <- hlfleL2 %>%
mutate(RS = -999,
RS = ifelse(RS_sweep > 0, RS_sweep, RS),
RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
mutate(RSA = -999,
RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
RSA = ifelse(RSA < 0, RSA_x, RSA)) %>%
filter(RS > 0) %>%
filter(RSA > 0) %>%
mutate(RSRSA = RS*RSA)
# Test how many years of LAT data I miss out on because I can't standardize it.
# hlfleL2 %>%
# mutate(RS = -999,
# RS = ifelse(RS_sweep > 0, RS_sweep, RS),
# RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
# filter(RS > 0) %>%
# filter(Country == "LAT") %>%
# distinct(Year) %>%
# arrange(Year)
#
# hlfleL2 %>%
# mutate(RSA = -999,
# RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
# RSA = ifelse(RSA < 0, RSA_x, RSA)) %>%
# filter(RSA > 0) %>%
# filter(Country == "LAT") %>%
# distinct(Year) %>%
# arrange(Year)
# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()
# Standardize!
hlcodL3 <- hlcodL3 %>%
mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
CPUEst = CPUEun*RS*RSA)
hlfleL3 <- hlfleL3 %>%
mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
CPUEst = CPUEun*RS*RSA)
unique(is.na(hlcodL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hlcodL3$CPUEst))
#> [1] FALSE
min(hlcodL3$CPUEst_kg)
#> [1] 0
min(hlcodL3$CPUEst)
#> [1] 0
unique(is.na(hlfleL3$CPUEst_kg)) # Remove the few NA's here
#> [1] TRUE FALSE
hlfleL3 <- hlfleL3 %>% drop_na(CPUEst_kg)
unique(is.na(hlfleL3$CPUEst))
#> [1] FALSE
min(hlfleL3$CPUEst_kg)
#> [1] 0
min(hlfleL3$CPUEst)
#> [1] 0
# Now calculate total CPUE per haul in Orios unit, then create the new unit, i.e.:
# convert from kg of fish caught by trawling for 1 h a standard bottom swept area of
# 0.45km2 (using a TVL trawl with 75 m sweeps at the standard speed of three knots) to....
# kg of fish per km^2 by dividing with 0.45
hlcodhaul <- hlcodL3 %>%
group_by(haul.id) %>%
mutate(haul_cpue_kg = sum(CPUEst_kg),
haul_cpue = sum(CPUEst),
haul_cpue_kg_un = sum(CPUEun_kg),
haul_cpue_un = sum(CPUEun),
density = haul_cpue_kg/0.45,
density_ab = haul_cpue/0.45) %>%
ungroup() %>%
distinct(haul.id, .keep_all = TRUE)
# t <- hlcodhaul %>% filter(haul_cpue_un == 0)
# t <- hlcodhaul %>% filter(!Country == "SWE") %>% filter(haul_cpue_un > 0)
hlflehaul <- hlfleL3 %>%
group_by(haul.id) %>%
mutate(haul_cpue_kg = sum(CPUEst_kg),
haul_cpue = sum(CPUEst),
haul_cpue_kg_un = sum(CPUEun_kg),
haul_cpue_un = sum(CPUEun),
density = haul_cpue_kg/0.45,
density_ab = haul_cpue/0.45) %>%
ungroup() %>%
distinct(haul.id, .keep_all = TRUE)
# Rename things and select specific columns
dat <- hlcodhaul %>% rename("year" = "Year",
"lat" = "ShootLat",
"lon" = "ShootLong",
"quarter" = "Quarter",
"ices_rect" = "Rect") %>%
dplyr::select(density, year, lat, lon, quarter, haul.id, IDx, ices_rect, SD)
# Now add in the flounder data in the cod data based on haul ID
hlflehaul_select <- hlflehaul %>% mutate(density_fle = density) %>% dplyr::select(density_fle, haul.id)
dat <- left_join(dat, hlflehaul_select, by = "haul.id") %>% drop_na(density_fle)
dat %>% arrange(desc(density_fle)) %>% data.frame() %>% head(50)
#> density year lat lon quarter haul.id
#> 1 75.853482 2010 57.3617 21.2350 4 2010:4:LAT:BALL:TVL:2:6
#> 2 94.385161 2007 57.2124 18.8491 4 2007:4:SWE:ARG:TVL:689:14
#> 3 265.278057 2012 57.4600 21.2450 4 2012:4:LAT:BALL:TVL:2:6
#> 4 145.764831 2019 54.4500 19.3283 4 2019:4:POL:BAL:TVL:26256:4
#> 5 36.249608 2009 57.8994 19.4332 4 2009:4:SWE:ARG:TVL:723:11
#> 6 109.185255 2012 57.3683 21.2533 4 2012:4:LAT:BALL:TVL:2:7
#> 7 1843.781093 2001 56.4709 18.6149 4 2001:4:DEN:DAN2:TVL:93:38
#> 8 868.595807 2019 55.4627 14.5290 4 2019:4:SWE:77SE:TVL:66:2
#> 9 177.603041 2008 57.1766 18.8081 4 2008:4:SWE:ARG:TVL:697:12
#> 10 468.959595 2011 57.2433 21.0817 4 2011:4:LAT:BALL:TVL:2:11
#> 11 312.027608 2008 57.4733 21.1167 4 2008:4:LAT:BALL:TVL:2:3
#> 12 9.468183 2013 57.7022 19.1408 4 2013:4:SWE:DAN2:TVL:33:17
#> 13 1614.959782 2007 57.2033 20.7167 4 2007:4:LAT:BALL:TVL:2:9
#> 14 266.250565 2013 57.8742 19.4317 4 2013:4:SWE:DAN2:TVL:30:15
#> 15 63.431536 2008 57.1973 18.8309 4 2008:4:SWE:ARG:TVL:710:16
#> 16 181.205759 2017 57.3583 21.2600 4 2017:4:LAT:BALL:TVL:2:2
#> 17 366.600070 2008 57.1658 18.8226 4 2008:4:SWE:ARG:TVL:696:11
#> 18 1410.470804 2002 56.0901 20.5504 4 2002:4:DEN:DAN2:TVL:29:14
#> 19 491.714968 2007 56.6283 20.5817 4 2007:4:LAT:BALL:TVL:2:15
#> 20 67.401173 2011 55.1000 20.1083 4 2011:4:RUS:ATL:TVL:62:2
#> 21 248.422144 2007 57.1698 18.8370 4 2007:4:SWE:ARG:TVL:687:12
#> 22 226.040831 2017 54.5167 18.9117 4 2017:4:POL:BAL:TVL:26280:41
#> 23 312.968144 2006 56.6850 20.7667 4 2006:4:LAT:BALL:TVL:2:14
#> 24 177.801499 2010 57.3233 21.0750 4 2010:4:LAT:BALL:TVL:2:5
#> 25 145.904915 2011 57.0200 20.9850 4 2011:4:LAT:BALL:TVL:2:12
#> 26 257.081364 2018 54.4150 19.3050 4 2018:4:POL:BAL:TVL:26163:11
#> 27 11.547042 2004 57.8733 19.4150 4 2004:4:SWE:ARG:TVL:726:5
#> 28 26.529045 2000 57.1790 18.8753 4 2000:4:SWE:ARG:GOV:606:38
#> 29 1319.885682 2008 55.8033 20.0700 4 2008:4:LAT:BALL:TVL:2:26
#> 30 93.681053 2006 57.2300 21.1567 4 2006:4:LAT:BALL:TVL:2:9
#> 31 1237.927169 2007 57.0367 20.7783 4 2007:4:LAT:BALL:TVL:2:11
#> 32 471.672180 2008 54.3950 18.9850 4 2008:4:POL:BAL:TVL:26219:20
#> 33 416.405620 2006 56.6650 20.7817 4 2006:4:LAT:BALL:TVL:2:15
#> 34 506.792401 2008 56.9983 20.7650 4 2008:4:LAT:BALL:TVL:2:12
#> 35 366.748811 2001 57.3633 16.9213 4 2001:4:SWE:ARG:TVL:597:9
#> 36 317.643716 2007 56.6233 20.5583 4 2007:4:LAT:BALL:TVL:2:16
#> 37 149.904232 2011 57.0433 21.0300 4 2011:4:LAT:BALL:TVL:2:13
#> 38 2886.907131 2008 55.1000 20.1500 4 2008:4:RUS:ATLD:TVL:51:32
#> 39 26.422042 2001 57.8727 19.4198 4 2001:4:SWE:ARG:TVL:585:3
#> 40 759.016698 2015 55.6842 14.5025 4 2015:4:SWE:DAN2:TVL:4:2
#> 41 446.869160 2011 56.6350 20.7333 4 2011:4:LAT:BALL:TVL:2:7
#> 42 133.605963 2009 57.4180 17.0216 4 2009:4:SWE:ARG:TVL:714:2
#> 43 234.058199 1999 54.7667 13.6667 4 1999:4:GFR:SOL:H20:37:32
#> 44 719.829357 2011 56.6100 20.6367 4 2011:4:LAT:BALL:TVL:2:6
#> 45 186.121205 2017 57.3617 21.2550 4 2017:4:LAT:BALL:TVL:2:1
#> 46 104.111882 2018 54.4033 19.2700 4 2018:4:POL:BAL:TVL:26216:12
#> 47 50.550152 2015 57.4283 21.2833 4 2015:4:LAT:BALL:TVL:2:8
#> 48 92.484410 2008 57.7783 21.4117 4 2008:4:LAT:BALL:TVL:2:2
#> 49 234.776192 2017 57.2183 21.1217 4 2017:4:LAT:BALL:TVL:2:3
#> 50 45.626984 1995 57.1647 18.8183 4 1995:4:SWE:ARG:FOT:256:11
#> IDx ices_rect SD density_fle
#> 1 2010.4.LAT.67BC.TVL.2.6 43H1 28 6723.2629
#> 2 2007.4.SWE.77AR.TVL.689.14 43G8 28 3251.8846
#> 3 2012.4.LAT.67BC.TVL.2.6 43H1 28 2744.7969
#> 4 2019.4.POL.67BC.TVL.26256.4 37G9 26 2566.4037
#> 5 2009.4.SWE.77AR.TVL.723.11 44G9 28 2518.0079
#> 6 2012.4.LAT.67BC.TVL.2.7 43H1 28 2483.0721
#> 7 2001.4.DEN.26D4.TVL.93.38 41G8 26 2154.9624
#> 8 2019.4.SWE.77SE.TVL.66.2 39G4 24 2085.8337
#> 9 2008.4.SWE.77AR.TVL.697.12 43G8 28 2067.7463
#> 10 2011.4.LAT.67BC.TVL.2.11 43H1 28 2058.3928
#> 11 2008.4.LAT.67BC.TVL.2.3 43H1 28 1910.8130
#> 12 2013.4.SWE.26D4.TVL.33.17 44G9 28 1738.9815
#> 13 2007.4.LAT.67BC.TVL.2.9 43H0 28 1717.2111
#> 14 2013.4.SWE.26D4.TVL.30.15 44G9 28 1694.3936
#> 15 2008.4.SWE.77AR.TVL.710.16 43G8 28 1654.8391
#> 16 2017.4.LAT.67BC.TVL.2.2 43H1 28 1600.2680
#> 17 2008.4.SWE.77AR.TVL.696.11 43G8 28 1584.2860
#> 18 2002.4.DEN.26D4.TVL.29.14 41H0 26 1583.0442
#> 19 2007.4.LAT.67BC.TVL.2.15 42H0 28 1576.0516
#> 20 2011.4.RUS.RUNT.TVL.62.2 39H0 26 1567.9321
#> 21 2007.4.SWE.77AR.TVL.687.12 43G8 28 1487.4590
#> 22 2017.4.POL.67BC.TVL.26280.41 38G8 26 1465.8711
#> 23 2006.4.LAT.67BC.TVL.2.14 42H0 28 1357.7216
#> 24 2010.4.LAT.67BC.TVL.2.5 43H1 28 1292.5683
#> 25 2011.4.LAT.67BC.TVL.2.12 43H0 28 1290.7944
#> 26 2018.4.POL.67BC.TVL.26163.11 37G9 26 1273.8596
#> 27 2004.4.SWE.77AR.TVL.726.5 44G9 28 1224.8131
#> 28 2000.4.SWE.77AR.GOV.606.38 43G8 28 1209.4399
#> 29 2008.4.LAT.67BC.TVL.2.26 40H0 26 1184.3045
#> 30 2006.4.LAT.67BC.TVL.2.9 43H1 28 1173.8761
#> 31 2007.4.LAT.67BC.TVL.2.11 43H0 28 1170.8015
#> 32 2008.4.POL.67BC.TVL.26219.20 37G8 26 1156.1423
#> 33 2006.4.LAT.67BC.TVL.2.15 42H0 28 1143.9646
#> 34 2008.4.LAT.67BC.TVL.2.12 42H0 28 1136.8738
#> 35 2001.4.SWE.77AR.TVL.597.9 43G6 27 1115.8434
#> 36 2007.4.LAT.67BC.TVL.2.16 42H0 28 1083.7434
#> 37 2011.4.LAT.67BC.TVL.2.13 43H1 28 1071.6831
#> 38 2008.4.RUS.RUJB.TVL.51.32 39H0 26 1070.0434
#> 39 2001.4.SWE.77AR.TVL.585.3 44G9 28 1068.8500
#> 40 2015.4.SWE.26D4.TVL.4.2 40G4 25 1068.2623
#> 41 2011.4.LAT.67BC.TVL.2.7 42H0 28 1041.5374
#> 42 2009.4.SWE.77AR.TVL.714.2 43G7 27 1028.1830
#> 43 1999.4.GFR.06S1.H20.37.32 38G3 24 1016.2832
#> 44 2011.4.LAT.67BC.TVL.2.6 42H0 28 1009.6423
#> 45 2017.4.LAT.67BC.TVL.2.1 43H1 28 1004.9214
#> 46 2018.4.POL.67BC.TVL.26216.12 37G9 26 991.3600
#> 47 2015.4.LAT.67BC.TVL.2.8 43H1 28 985.0383
#> 48 2008.4.LAT.67BC.TVL.2.2 44H1 28 984.6904
#> 49 2017.4.LAT.67BC.TVL.2.3 43H1 28 956.4140
#> 50 1995.4.SWE.77AR.FOT.256.11 43G8 28 909.9229
# Filter unrealistically large densities
dat <- dat %>% filter(density_fle < 10000)
ggplot(dat, aes(density, density_fle)) + geom_point()
# Read Orio data first for comparison:
test_cod <- hlcodhaul %>% filter(!Country == "SWE")
test_fle <- hlflehaul %>% filter(!Country == "SWE")
orio_cod <- read.csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/ale_gear_standardization/datras_st_ale.csv")
orio_fle <- read.csv("/Users/maxlindmark/Desktop/R_STUDIO_PROJECTS/ale_gear_standardization/datras_fle_st_ale.csv")
orio_cod <- orio_cod %>%
group_by(IDX) %>%
mutate(haul_cpue_kg = sum(CPUEstBIOyear),
haul_cpue = sum(CPUEst),
haul_cpue_kg_un = sum(CPUEunBIOyear),
haul_cpue_un = sum(CPUEun)) %>%
ungroup() %>%
distinct(IDX, .keep_all = TRUE)
orio_fle <- orio_fle %>%
group_by(IDX) %>%
mutate(haul_cpue_kg = sum(CPUEstBIOyear),
haul_cpue = sum(CPUEst),
haul_cpue_kg_un = sum(CPUEunBIOyear),
haul_cpue_un = sum(CPUEun)) %>%
ungroup() %>%
distinct(IDX, .keep_all = TRUE)
# hlcodhaul %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
# Standardize data for easier plotting
test_cod <- test_cod %>%
dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, Year, Ship3, Country, Gear) %>%
mutate(source = "Max",
species = "Cod") %>%
rename("Ship" = "Ship3") %>%
filter(Year > 1992 & Year < 2017)
test_fle <- test_fle %>%
dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, Year, Ship3, Country, Gear) %>%
mutate(source = "Max",
species = "Fle") %>%
rename("Ship" = "Ship3") %>%
filter(Year > 1992 & Year < 2017)
orio_cod <- orio_cod %>%
dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, year, vessel, IDX) %>%
mutate(source = "Ale",
species = "Cod") %>%
rename("Year" = "year",
"Ship" = "vessel") %>%
mutate(IDX2 = IDX,
IDX3 = IDX) %>%
separate(IDX, sep = c(5:7), into = c("temp_year", "Quarter")) %>%
separate(temp_year, sep = c(4:5), into = c("Year", "scrap")) %>%
separate(IDX2, sep = 7, into = c("scrap2", "Country")) %>%
separate(Country, sep = 3, into = c("Country", "scrap3")) %>%
separate(IDX3, sep = 15, into = c("scrap4", "Gear")) %>%
separate(Gear, sep = 3, into = c("Gear", "scrap5")) %>%
dplyr::select(-scrap, -scrap2, -scrap3, -scrap4, -scrap5) %>%
filter(Quarter == 4) %>%
mutate(Year = as.integer(Year)) %>%
mutate(haul_cpue_kg = haul_cpue_kg/1000,
haul_cpue_kg_un = haul_cpue_kg_un/1000) %>%
filter(Year > 1992) %>%
filter(Country %in% unique(test_cod$Country))
orio_fle <- orio_fle %>%
dplyr::select(haul_cpue_kg, haul_cpue, haul_cpue_kg_un, haul_cpue_un, year, vessel, IDX) %>%
mutate(source = "Ale",
species = "Fle") %>%
rename("Year" = "year",
"Ship" = "vessel") %>%
mutate(IDX2 = IDX,
IDX3 = IDX) %>%
separate(IDX, sep = c(5:7), into = c("temp_year", "Quarter")) %>%
separate(temp_year, sep = c(4:5), into = c("Year", "scrap")) %>%
separate(IDX2, sep = 7, into = c("scrap2", "Country")) %>%
separate(Country, sep = 3, into = c("Country", "scrap3")) %>%
separate(IDX3, sep = 15, into = c("scrap4", "Gear")) %>%
separate(Gear, sep = 3, into = c("Gear", "scrap5")) %>%
dplyr::select(-scrap, -scrap2, -scrap3, -scrap4, -scrap5) %>%
filter(Quarter == 4) %>%
mutate(Year = as.integer(Year)) %>%
mutate(haul_cpue_kg = haul_cpue_kg/1000,
haul_cpue_kg_un = haul_cpue_kg_un/1000) %>%
filter(Year > 1992) %>%
filter(Country %in% unique(test_cod$Country))
# sort(unique(test_cod$Country))
# sort(unique(orio_cod$Country))
# Check proportions of zero
t <- test_cod %>% filter(haul_cpue_kg > 0)
t <- orio_cod %>% filter(haul_cpue_kg > 0)
t <- test_fle %>% filter(haul_cpue_kg > 0)
t <- orio_fle %>% filter(haul_cpue_kg > 0)
test_full <- bind_rows(orio_fle, orio_cod, test_cod, test_fle)
# Check the non-standardized data for cod
# Raw abundance cpue
test_full %>%
filter(species == "Cod") %>%
ggplot(., aes(factor(Year), haul_cpue_un, color = source)) +
geom_point(size = 1, position = position_dodge(width = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
test_full %>%
filter(species == "Cod") %>%
ggplot(., aes(haul_cpue_un, fill = source)) +
geom_histogram(position = position_dodge()) +
facet_wrap(~Year, scale = "free") +
theme(axis.text.x = element_text(angle = 90)) +
NULL
# Mean abundance cpue
test_full %>%
filter(species == "Cod") %>%
group_by(Year, source) %>%
summarise(mean_cpue = mean(haul_cpue_un),
sd_cpue = sd(haul_cpue_un)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
geom_point(size = 3) +
NULL
# Mean abundance cpue by country
test_full %>%
filter(species == "Cod") %>%
group_by(Year, source, Country) %>%
summarise(mean_cpue = mean(haul_cpue_un),
sd_cpue = sd(haul_cpue_un)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
facet_wrap(~ Country) +
NULL
# Mean abundance cpue for non-zero cathes
test_full %>%
filter(species == "Cod") %>%
filter(haul_cpue_un > 0) %>%
group_by(Year, source) %>%
summarise(mean_cpue = mean(haul_cpue_un),
sd_cpue = sd(haul_cpue_un)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
geom_point(size = 3) +
NULL
# Now plot corrected biomass cpue
test_full %>%
filter(species == "Cod") %>%
ggplot(., aes(factor(Year), haul_cpue_kg, color = source)) +
geom_point(size = 1, position = position_dodge(width = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
# Mean corrected biomass cpue
test_full %>%
filter(species == "Cod") %>%
group_by(Year, source) %>%
summarise(mean_cpue = mean(haul_cpue_kg),
sd_cpue = sd(haul_cpue_kg)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
geom_point(size = 3) +
NULL
# Now check in on flounder
# Raw abundance cpue
test_full %>%
filter(species == "Fle") %>%
ggplot(., aes(factor(Year), haul_cpue_un, color = source)) +
geom_point(size = 1, position = position_dodge(width = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
test_full %>%
filter(species == "Fle") %>%
ggplot(., aes(haul_cpue_un, fill = source)) +
geom_histogram(position = position_dodge()) +
facet_wrap(~Year, scale = "free") +
theme(axis.text.x = element_text(angle = 90)) +
NULL
# Mean abundance cpue
test_full %>%
filter(species == "Fle") %>%
group_by(Year, source) %>%
summarise(mean_cpue = mean(haul_cpue_un),
sd_cpue = sd(haul_cpue_un)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
geom_point(size = 3) +
NULL
# Ok, here we can see that Ale predicts an increase earlier, which we also saw
# on the plot of raw catches as points
# Mean abundance cpue by country to see how this can arise
test_full %>%
filter(species == "Fle") %>%
group_by(Year, source, Country) %>%
summarise(mean_cpue = mean(haul_cpue_un),
sd_cpue = sd(haul_cpue_un)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
facet_wrap(~ Country) +
NULL
# Ok, after checking, the reason I don't have the older LAT data is because I don't
# have RS or RSA-values for those hauls, even though the catch data are in datras...
# Should probably check with Ale how he corrected those!
# Now plot corrected biomass cpue
test_full %>%
filter(species == "Fle") %>%
ggplot(., aes(factor(Year), haul_cpue_kg, color = source)) +
geom_point(size = 1, position = position_dodge(width = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
NULL
# Mean corrected biomass cpue
test_full %>%
filter(species == "Fle") %>%
group_by(Year, source) %>%
summarise(mean_cpue = mean(haul_cpue_kg),
sd_cpue = sd(haul_cpue_kg)) %>%
ggplot(., aes(Year, mean_cpue, linetype = source, color = source)) +
geom_line(size = 1.1) +
geom_point(size = 3) +
NULL
# Then check lenght-weight relationships again
# Read the tifs
west <- raster("data/depth_geo_tif/D5_2018_rgb-1.tif")
#plot(west)
east <- raster("data/depth_geo_tif/D6_2018_rgb-1.tif")
# plot(east)
dep_rast <- raster::merge(west, east)
dat$depth <- extract(dep_rast, dat[, 4:3])
# Convert to depth (instead of elevation)
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dat$depth <- (dat$depth - max(drop_na(dat)$depth)) *-1
#> drop_na: no rows removed
ggplot(dat, aes(depth)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1610091357600.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float o2b[longitude,latitude,time]
#> long_name: Sea_floor_Dissolved_Oxygen_Concentration
#> missing_value: NaN
#> standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#> units: mmol m-3
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:324
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25551.5
#> latitude Size:166
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 52.991626739502
#> valid_max: 58.4915390014648
#> longitude Size:253
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 23.013614654541
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#> title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#> file_quality_index: 1
#> creation_date: 2020-11-16 UTC
#> bullentin_date: 20191201
#> start_date: 2019-12-01 UTC
#> stop_date: 2019-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 52.99163 53.02496 53.05829 53.09163 53.12496 53.15829
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get oxygen
dname <- "o2b"
oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 253 166 324
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA
# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
index_keep <- which(months > 9)
oxy_q4 <- oxy_array[, , index_keep]
months_keep <- months[index_keep]
years_keep <- years[index_keep]
# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(oxy_q4)[3], by = 3)
# Create objects that will hold data
dlist <- list()
oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave <- c()
# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
oxy_10 <- oxy_q4[, , (i)]
oxy_11 <- oxy_q4[, , (i + 1)]
oxy_12 <- oxy_q4[, , (i + 2)]
oxy_ave <- (oxy_10 + oxy_11 + oxy_12) / 3
list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist[[list_pos]] <- oxy_ave
}
# Now name the lists with the year:
names(dlist) <- unique(years_keep)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: removed 31 rows (1%), 3,453 rows remaining
# Create data holding object
data_list <- list()
# ... And for the oxygen raster
raster_list <- list()
# Create factor year for indexing the list in the loop
d_sub_oxy$year_f <- as.factor(d_sub_oxy$year)
# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy$year_f)) {
# Set plot limits
ymin = 54; ymax = 58; xmin = 12; xmax = 22
# Subset a year
oxy_slice <- dlist[[i]]
# Create raster for that year (i)
r <- raster(t(oxy_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r <- flip(r, direction = 'y')
#plot(r, main = i)
# Filter the same year (i) in the cpue data and select only coordinates
d_slice <- d_sub_oxy %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp <- SpatialPoints(d_slice)
# Extract raster value (oxygen)
rasValue <- raster::extract(r, data_sp)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for pl)
df <- as.data.frame(data_sp)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice$oxy <- rasValue
# Add in which year
d_slice$year <- i
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index <- as.numeric(d_slice$year)[1] - 1992
# Add each years' data in the list
data_list[[index]] <- d_slice
# Save to check each year is ok! First convert the raster to points for plotting
# (so that we can use ggplot)
map.p <- rasterToPoints(r)
# Make the points a dataframe for ggplot
df_rast <- data.frame(map.p)
# Rename y-variable and add year
df_rast <- df_rast %>% rename("oxy" = "layer") %>% mutate(year = i)
# Add each years' raster data frame in the list
raster_list[[index]] <- df_rast
# Make appropriate column headings
colnames(df_rast) <- c("Longitude", "Latitude", "oxy")
# Now make the map
ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
geom_raster(aes(fill = oxy)) +
geom_point(data = d_slice, aes(x = lon, y = lat, fill = oxy),
color = "black", size = 5, shape = 21) +
theme_bw() +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(xmin, xmax),
ylim = c(ymin, ymax)) +
scale_colour_gradientn(colours = rev(terrain.colors(10)),
limits = c(-200, 400)) +
scale_fill_gradientn(colours = rev(terrain.colors(10)),
limits = c(-200, 400)) +
NULL
ggsave(paste("figures/supp/cpue_oxygen_rasters/", i,".png", sep = ""),
width = 6.5, height = 6.5, dpi = 600)
}
#> filter: removed 3,392 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,391 rows (98%), 62 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,400 rows (98%), 53 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,392 rows (98%), 61 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,378 rows (98%), 75 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,385 rows (98%), 68 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,359 rows (97%), 94 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,364 rows (97%), 89 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,338 rows (97%), 115 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,336 rows (97%), 117 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,350 rows (97%), 103 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,368 rows (98%), 85 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,332 rows (96%), 121 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,303 rows (96%), 150 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,291 rows (95%), 162 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,277 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,297 rows (95%), 156 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,291 rows (95%), 162 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,273 rows (95%), 180 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,320 rows (96%), 133 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,305 rows (96%), 148 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,277 rows (95%), 176 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,287 rows (95%), 166 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,240 rows (94%), 213 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,228 rows (93%), 225 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,243 rows (94%), 210 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,361 rows (97%), 92 rows remaining
#> rename: renamed one variable (oxy)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
# Now create a data frame from the list of all annual values
big_dat_oxy <- dplyr::bind_rows(data_list)
big_raster_dat_oxy <- dplyr::bind_rows(raster_list)
# Plot data, looks like there's big inter-annual variation but a negative trend over time
big_raster_dat_oxy %>%
group_by(year) %>%
drop_na(oxy) %>%
summarise(mean_oxy = mean(oxy)) %>%
mutate(year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, mean_oxy)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
big_raster_dat_oxy %>%
group_by(year) %>%
drop_na(oxy) %>%
mutate(dead = ifelse(oxy < 0, "Y", "N")) %>%
filter(dead == "Y") %>%
mutate(n = n(),
year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, n)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> mutate (grouped): new variable 'dead' (character) with 2 unique values and 0% NA
#> filter (grouped): removed 437,238 rows (93%), 31,671 rows remaining
#> mutate (grouped): new variable 'n' (integer) with 27 unique values and 0% NA
#> new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
# Now add in the new oxygen column in the original data:
str(d_sub_oxy)
#> tibble [3,453 × 12] (S3: tbl_df/tbl/data.frame)
#> $ density : num [1:3453] 71 144.28 850.94 19.18 4.24 ...
#> $ year : int [1:3453] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#> $ lat : num [1:3453] 57.5 57.5 54.7 57.1 57.8 ...
#> $ lon : num [1:3453] 17.1 17.6 13.7 18.9 19.5 ...
#> $ quarter : int [1:3453] 4 4 4 4 4 4 4 4 4 4 ...
#> $ haul.id : chr [1:3453] "1993:4:SWE:ARG:FOT:262:31" "1993:4:SWE:ARG:FOT:263:32" "1993:4:GFR:SOL:H20:34:60" "1993:4:SWE:ARG:FOT:256:25" ...
#> $ IDx : chr [1:3453] "1993.4.SWE.77AR.FOT.262.31" "1993.4.SWE.77AR.FOT.263.32" "1993.4.GFR.06S1.H20.34.60" "1993.4.SWE.77AR.FOT.256.25" ...
#> $ ices_rect : chr [1:3453] "43G7" "43G7" "38G3" "43G8" ...
#> $ SD : chr [1:3453] "27" "27" "24" "28" ...
#> $ density_fle: num [1:3453] 172.6 12.8 226.1 25.4 46 ...
#> $ depth : num [1:3453] 73 88 20 75 86 10 8 7 32 45 ...
#> $ year_f : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_oxy)
#> tibble [3,453 × 4] (S3: tbl_df/tbl/data.frame)
#> $ lon : num [1:3453] 17.1 17.6 13.7 18.9 19.5 ...
#> $ lat : num [1:3453] 57.5 57.5 54.7 57.1 57.8 ...
#> $ oxy : num [1:3453] 340 276 302 328 333 ...
#> $ year: chr [1:3453] "1993" "1993" "1993" "1993" ...
# Create an ID for matching the oxygen data with the cpue data
dat$id_oxy <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_oxy$id_oxy <- paste(big_dat_oxy$year, big_dat_oxy$lon, big_dat_oxy$lat, sep = "_")
# Which id's are not in the cpue data (dat)?
ids <- dat$id_oxy[!dat$id_oxy %in% c(big_dat_oxy$id_oxy)]
unique(ids)
#> [1] "1992_13.45_55.0167" "1992_13.1_54.9167" "1992_13.9667_54.9333"
#> [4] "1992_14.35_55.1" "1992_14.1667_55.1333" "1992_13.9_54.5"
#> [7] "1992_14.2_54.5" "1992_14.2833_55.1833" "1992_13.8_54.5167"
#> [10] "1992_14.3667_54.5167" "1992_13.8833_54.5667" "1992_14.1333_54.5667"
#> [13] "1992_13.8_54.6333" "1992_14.2167_54.6333" "1992_13.1167_54.65"
#> [16] "1992_13.65_54.6833" "1992_13.3333_54.7" "1992_14.1_54.7"
#> [19] "1992_13.1167_54.7167" "1992_13.55_54.7333" "1992_13.7_54.75"
#> [22] "1992_13.8167_54.75" "1992_13.4167_54.7833" "1992_13.2_54.8667"
#> [25] "1992_14.0333_54.8667" "1992_13.6333_55" "1992_13.7667_55.1"
#> [28] "1992_13.55_55.0333" "1992_13.1167_54.9833" "1992_14.3_55.0333"
#> [31] "1992_13.8667_55.1"
# Select only the columns we want to merge
big_dat_sub_oxy <- big_dat_oxy %>% dplyr::select(id_oxy, oxy)
# Remove duplicate ID (one oxy value per id)
big_dat_sub_oxy2 <- big_dat_sub_oxy %>% distinct(id_oxy, .keep_all = TRUE)
#> distinct: removed 9 rows (<1%), 3,444 rows remaining
# big_dat_sub_oxy %>% group_by(id_oxy) %>% mutate(n = n()) %>% arrange(desc(n))
# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_oxy2, by = "id_oxy")
#> left_join: added one column (oxy)
#> > rows only in x 31
#> > rows only in y ( 0)
#> > matched rows 3,453
#> > =======
#> > rows total 3,484
# Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
# and it's been converted by the data host. Since it was converted without accounting for
# pressure or temperature, I can simply use the following conversion factor:
# 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
# https://ocean.ices.dk/tools/unitconversion.aspx
dat$oxy <- dat$oxy * 0.0223909
# Drop NA oxygen
dat <- dat %>% drop_na(oxy)
#> drop_na: removed 37 rows (1%), 3,447 rows remaining
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc")
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1608127623694.nc (NC_FORMAT_CLASSIC):
#>
#> 1 variables (excluding dimension variables):
#> float bottomT[longitude,latitude,time]
#> standard_name: sea_water_potential_temperature_at_sea_floor
#> units: degrees_C
#> long_name: Sea floor potential temperature
#> missing_value: NaN
#> _FillValue: NaN
#> _ChunkSizes: 1
#> _ChunkSizes: 523
#> _ChunkSizes: 383
#>
#> 3 dimensions:
#> time Size:324
#> axis: T
#> long_name: Validity time
#> standard_name: time
#> units: days since 1950-01-01 00:00:00
#> calendar: gregorian
#> _ChunkSizes: 512
#> _CoordinateAxisType: Time
#> valid_min: 15721.5
#> valid_max: 25551.5
#> latitude Size:523
#> axis: Y
#> standard_name: latitude
#> long_name: latitude
#> units: degrees_north
#> _CoordinateAxisType: Lat
#> valid_min: 48.49169921875
#> valid_max: 65.8914184570312
#> longitude Size:383
#> standard_name: longitude
#> long_name: longitude
#> units: degrees_east
#> axis: X
#> _CoordinateAxisType: Lon
#> valid_min: 9.01375484466553
#> valid_max: 30.2357654571533
#>
#> 24 global attributes:
#> references: http://www.smhi.se
#> institution: Swedish Meterological and Hydrological Institute
#> history: See source and creation_date attributees
#> Conventions: CF-1.5
#> contact: servicedesk_cmems@mercator-ocean.eu
#> comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#> bullentin_type: reanalysis
#> cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#> title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#> FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#> FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#> FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#> FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#> shallowest_depth: 1.50136542320251
#> deepest_depth: 711.059204101562
#> source: SMHI reanalysis run NORDIC-NS2_1d_20191201_20191202
#> file_quality_index: 1
#> creation_date: 2020-11-16 UTC
#> bullentin_date: 20191201
#> start_date: 2019-12-01 UTC
#> stop_date: 2019-12-01 UTC
#> start_time: 00:00 UTC
#> stop_time: 00:00 UTC
#> _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention
# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530
lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836
# Get time
time <- ncvar_get(ncin,"time")
time
#> [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#> [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#> [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#> [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#> [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#> [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#> [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#> [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#> [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#> [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#> [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 324
tunits
#> $hasatt
#> [1] TRUE
#>
#> $value
#> [1] "days since 1950-01-01 00:00:00"
# Get temperature
dname <- "bottomT"
temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 324
# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])
# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))
# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)
# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA
# We only use Quarter 4 in this analysis, so now we want to loop through each time step,
# and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1
#> [26] 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2
#> [51] 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3
#> [76] 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4
#> [101] 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5
#> [126] 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6
#> [151] 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7
#> [176] 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8
#> [201] 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9
#> [226] 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10
#> [251] 11 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11
#> [276] 12 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
#> [301] 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12
index_keep <- which(months > 9)
# Quarter 4 by keeping months in index_keep
temp_q4 <- temp_array[, , index_keep]
months_keep <- months[index_keep]
years_keep <- years[index_keep]
# Now we have an array with only Q4 data...
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq <- seq(1, dim(temp_q4)[3], by = 3)
# Create objects that will hold data
dlist <- list()
temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave <- c()
# Loop through the vector sequence with every third value, then take the average of
# three consecutive months (i.e. q4)
for(i in loop_seq) {
temp_10 <- temp_q4[, , (i)]
temp_11 <- temp_q4[, , (i + 1)]
temp_12 <- temp_q4[, , (i + 2)]
temp_ave <- (temp_10 + temp_11 + temp_12) / 3
list_pos <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
dlist[[list_pos]] <- temp_ave
}
# Now name the lists with the year:
names(dlist) <- unique(years_keep)
# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script
# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp <- dat %>% filter(year %in% names(dlist)) %>% droplevels()
#> filter: no rows removed
# Create data holding object
data_list <- list()
# ... And for the temperature raster
raster_list <- list()
# Create factor year for indexing the list in the loop
d_sub_temp$year_f <- as.factor(d_sub_temp$year)
# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp$year_f)) {
# Subset a year
temp_slice <- dlist[[i]]
# Create raster for that year (i)
r <- raster(t(temp_slice), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
# Flip...
r <- flip(r, direction = 'y')
#plot(r, main = i)
# Filter the same year (i) in the cpue data and select only coordinates
d_slice <- d_sub_temp %>% filter(year_f == i) %>% dplyr::select(lon, lat)
# Make into a SpatialPoints object
data_sp <- SpatialPoints(d_slice)
# Extract raster value (temperature)
rasValue <- raster::extract(r, data_sp)
# Now we want to plot the results of the raster extractions by plotting the cpue
# data points over a raster and saving it for each year.
# Make the SpatialPoints object into a raster again (for pl)
df <- as.data.frame(data_sp)
# Add in the raster value in the df holding the coordinates for the cpue data
d_slice$temp <- rasValue
# Add in which year
d_slice$year <- i
# Create a index for the data last where we store all years (because our loop index
# i is not continuous, we can't use it directly)
index <- as.numeric(d_slice$year)[1] - 1992
# Add each years' data in the list
data_list[[index]] <- d_slice
# Save to check each year is ok! First convert the raster to points for plotting
# (so that we can use ggplot)
map.p <- rasterToPoints(r)
# Make the points a dataframe for ggplot
df_rast <- data.frame(map.p)
# Rename y-variable and add year
df_rast <- df_rast %>% rename("temp" = "layer") %>% mutate(year = i)
# Add each years' raster data frame in the list
raster_list[[index]] <- df_rast
# Make appropriate column headings
colnames(df_rast) <- c("Longitude", "Latitude", "temp")
# Now make the map
ggplot(data = df_rast, aes(y = Latitude, x = Longitude)) +
geom_raster(aes(fill = temp)) +
geom_point(data = d_slice, aes(x = lon, y = lat, fill = temp),
color = "black", size = 5, shape = 21) +
theme_bw() +
geom_sf(data = world, inherit.aes = F, size = 0.2) +
coord_sf(xlim = c(min(dat$lon), max(dat$lon)),
ylim = c(min(dat$lat), max(dat$lat))) +
scale_colour_gradientn(colours = rev(terrain.colors(10)),
limits = c(2, 17)) +
scale_fill_gradientn(colours = rev(terrain.colors(10)),
limits = c(2, 17)) +
NULL
ggsave(paste("figures/supp/cpue_temp_rasters/", i,".png", sep = ""),
width = 6.5, height = 6.5, dpi = 600)
}
#> filter: removed 3,387 rows (98%), 60 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,385 rows (98%), 62 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,394 rows (98%), 53 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,386 rows (98%), 61 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,372 rows (98%), 75 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,379 rows (98%), 68 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,353 rows (97%), 94 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,358 rows (97%), 89 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,332 rows (97%), 115 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,330 rows (97%), 117 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,344 rows (97%), 103 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,362 rows (98%), 85 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,326 rows (96%), 121 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,297 rows (96%), 150 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,285 rows (95%), 162 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,272 rows (95%), 175 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,293 rows (96%), 154 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,286 rows (95%), 161 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,267 rows (95%), 180 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,315 rows (96%), 132 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,299 rows (96%), 148 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,271 rows (95%), 176 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,281 rows (95%), 166 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,234 rows (94%), 213 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,222 rows (93%), 225 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,237 rows (94%), 210 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
#> filter: removed 3,355 rows (97%), 92 rows remaining
#> rename: renamed one variable (temp)
#> mutate: new variable 'year' (character) with one unique value and 0% NA
# Now create a data frame from the list of all annual values
big_dat_temp <- dplyr::bind_rows(data_list)
big_raster_dat_temp <- dplyr::bind_rows(raster_list)
big_dat_temp %>% drop_na(temp) %>% summarise(max = max(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#> max
#> <dbl>
#> 1 14.5
big_dat_temp %>% drop_na(temp) %>% summarise(min = min(temp))
#> drop_na: no rows removed
#> summarise: now one row and one column, ungrouped
#> # A tibble: 1 x 1
#> min
#> <dbl>
#> 1 3.53
# Plot data, looks like there's big inter-annual variation but a positive trend
big_raster_dat_temp %>%
group_by(year) %>%
drop_na(temp) %>%
summarise(mean_temp = mean(temp)) %>%
mutate(year_num = as.numeric(year)) %>%
ggplot(., aes(year_num, mean_temp)) +
geom_point(size = 2) +
stat_smooth(method = "lm") +
NULL
#> group_by: one grouping variable (year)
#> drop_na (grouped): no rows removed
#> summarise: now 27 rows and 2 columns, ungrouped
#> mutate: new variable 'year_num' (double) with 27 unique values and 0% NA
#> `geom_smooth()` using formula 'y ~ x'
# Now add in the new temperature column in the original data:
str(d_sub_temp)
#> tibble [3,447 × 14] (S3: tbl_df/tbl/data.frame)
#> $ density : num [1:3447] 71 144.28 850.94 19.18 4.24 ...
#> $ year : int [1:3447] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
#> $ lat : num [1:3447] 57.5 57.5 54.7 57.1 57.8 ...
#> $ lon : num [1:3447] 17.1 17.6 13.7 18.9 19.5 ...
#> $ quarter : int [1:3447] 4 4 4 4 4 4 4 4 4 4 ...
#> $ haul.id : chr [1:3447] "1993:4:SWE:ARG:FOT:262:31" "1993:4:SWE:ARG:FOT:263:32" "1993:4:GFR:SOL:H20:34:60" "1993:4:SWE:ARG:FOT:256:25" ...
#> $ IDx : chr [1:3447] "1993.4.SWE.77AR.FOT.262.31" "1993.4.SWE.77AR.FOT.263.32" "1993.4.GFR.06S1.H20.34.60" "1993.4.SWE.77AR.FOT.256.25" ...
#> $ ices_rect : chr [1:3447] "43G7" "43G7" "38G3" "43G8" ...
#> $ SD : chr [1:3447] "27" "27" "24" "28" ...
#> $ density_fle: num [1:3447] 172.6 12.8 226.1 25.4 46 ...
#> $ depth : num [1:3447] 73 88 20 75 86 10 8 7 32 45 ...
#> $ id_oxy : chr [1:3447] "1993_17.0833_57.4716" "1993_17.555_57.4767" "1993_13.65_54.6833" "1993_18.8633_57.0817" ...
#> $ oxy : num [1:3447] 7.62 6.18 6.75 7.34 7.45 ...
#> $ year_f : Factor w/ 27 levels "1993","1994",..: 1 1 1 1 1 1 1 1 1 1 ...
str(big_dat_temp)
#> tibble [3,447 × 4] (S3: tbl_df/tbl/data.frame)
#> $ lon : num [1:3447] 17.1 17.6 13.7 18.9 19.5 ...
#> $ lat : num [1:3447] 57.5 57.5 54.7 57.1 57.8 ...
#> $ temp: num [1:3447] 5.71 4.27 8.15 4.1 4.23 ...
#> $ year: chr [1:3447] "1993" "1993" "1993" "1993" ...
# Create an ID for matching the temperature data with the cpue data
dat$id_temp <- paste(dat$year, dat$lon, dat$lat, sep = "_")
big_dat_temp$id_temp <- paste(big_dat_temp$year, big_dat_temp$lon, big_dat_temp$lat, sep = "_")
# Which id's are not in the cpue data (dat)? (It's because I don't have those years, not about the location)
ids <- dat$id_temp[!dat$id_temp %in% c(big_dat_temp$id_temp)]
unique(ids)
#> character(0)
# Select only the columns we want to merge
big_dat_sub_temp <- big_dat_temp %>% dplyr::select(id_temp, temp)
# Remove duplicate ID (one temp value per id)
big_dat_sub_temp2 <- big_dat_sub_temp %>% distinct(id_temp, .keep_all = TRUE)
#> distinct: removed 9 rows (<1%), 3,438 rows remaining
# Join the data with raster-derived oxygen with the full cpue data
dat <- left_join(dat, big_dat_sub_temp2, by = "id_temp")
#> left_join: added one column (temp)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 3,447
#> > =======
#> > rows total 3,447
colnames(dat)
#> [1] "density" "year" "lat" "lon" "quarter"
#> [6] "haul.id" "IDx" "ices_rect" "SD" "density_fle"
#> [11] "depth" "id_oxy" "oxy" "id_temp" "temp"
dat <- dat %>% dplyr::select(-id_temp, -id_oxy)
# Drop NA temp
dat <- dat %>% drop_na(temp)
#> drop_na: no rows removed
# First add UTM coords
# Add UTM coords
# Function
LongLatToUTM <- function(x, y, zone){
xy <- data.frame(ID = 1:length(x), X = x, Y = y)
coordinates(xy) <- c("X", "Y")
proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example
res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
return(as.data.frame(res))
}
utm_coords <- LongLatToUTM(dat$lon, dat$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat$X <- utm_coords$X/1000 # for computational reasons
dat$Y <- utm_coords$Y/1000 # for computational reasons
write.csv(dat, file = "data/for_analysis/mdat_cpue.csv", row.names = FALSE)